Linear Regression
Linear regression for high income countries
grp = income_grp[1]
dat2 <- get_data(dat, grp)
fit <- lm(IT.NET.USER.ZS ~ year, data=dat2)
lmdls[[grp]] <- fit
summary(fit)
##
## Call:
## lm(formula = IT.NET.USER.ZS ~ year, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.464 -9.904 -0.856 9.171 47.032
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.078e+03 6.832e+01 -103.6 <2e-16 ***
## year 3.552e+00 3.408e-02 104.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.94 on 2143 degrees of freedom
## Multiple R-squared: 0.8352, Adjusted R-squared: 0.8351
## F-statistic: 1.086e+04 on 1 and 2143 DF, p-value: < 2.2e-16
pyear <- seq(min(dat2$year), max(dat2$year), 1)
plot(dat2, xlim = c(min(pyear), max(pyear)), ylim = c(0,100),
pch=16, col="blue", main = grp, xaxt='n',yaxt='n', cex = 0.5)
axis(1, at = seq(min(pyear), max(pyear), 1), tck = 1, lty = 4, col = "lightgray", lwd = 0.5)
axis(2, at = seq(0, 100, 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.5)
matlines(pyear, predict(fit, newdata=list(year=pyear), interval="confidence"), lwd=1)

Linear regression for upper middle income countries
grp = income_grp[4]
dat2 <- get_data(dat, grp)
fit <- lm(IT.NET.USER.ZS ~ year, data=dat2)
lmdls[[grp]] <- fit
summary(fit)
##
## Call:
## lm(formula = IT.NET.USER.ZS ~ year, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.680 -8.094 -0.092 9.467 36.576
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.785e+03 7.244e+01 -66.06 <2e-16 ***
## year 2.397e+00 3.613e-02 66.35 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.37 on 1502 degrees of freedom
## Multiple R-squared: 0.7456, Adjusted R-squared: 0.7454
## F-statistic: 4402 on 1 and 1502 DF, p-value: < 2.2e-16
pyear <- seq(min(dat2$year), 2040, 1)
plot(dat2, xlim = c(min(pyear), max(pyear)), ylim = c(0,100),
pch=16, col="blue", main = grp, xaxt='n',yaxt='n', cex = 0.5)
axis(1, at = seq(min(pyear), max(pyear), 1), tck = 1, lty = 4, col = "lightgray", lwd = 0.5)
axis(2, at = seq(0, 100, 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.5)
matlines(pyear, predict(fit, newdata=list(year=pyear), interval="confidence"), lwd=1)

Linear regression for lower middle income countries
grp = income_grp[3]
dat2 <- get_data(dat, grp)
fit <- lm(IT.NET.USER.ZS ~ year, data=dat2)
lmdls[[grp]] <- fit
summary(fit)
##
## Call:
## lm(formula = IT.NET.USER.ZS ~ year, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.566 -7.260 -1.532 5.408 52.033
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.773e+03 6.312e+01 -43.94 <2e-16 ***
## year 1.389e+00 3.148e-02 44.11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.13 on 1593 degrees of freedom
## Multiple R-squared: 0.5498, Adjusted R-squared: 0.5496
## F-statistic: 1946 on 1 and 1593 DF, p-value: < 2.2e-16
pyear <- seq(min(dat2$year), 2070, 1)
plot(dat2, xlim = c(min(pyear), max(pyear)), ylim = c(0,100),
pch=16, col="blue", main = grp, xaxt='n',yaxt='n', cex = 0.75)
axis(1, at = seq(min(pyear), max(pyear), 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
axis(2, at = seq(0, 100, 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
matlines(pyear, predict(fit, newdata=list(year=pyear), interval="confidence"), lwd=1.25)

Linear regression for low income countries
grp = income_grp[2]
dat2 <- get_data(dat, grp)
fit <- lm(IT.NET.USER.ZS ~ year, data=dat2)
lmdls[[grp]] <- fit
summary(fit)
##
## Call:
## lm(formula = IT.NET.USER.ZS ~ year, data = dat2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.8467 -3.3289 -0.8389 2.2925 24.7884
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.036e+03 4.102e+01 -25.25 <2e-16 ***
## year 5.187e-01 2.046e-02 25.35 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.132 on 794 degrees of freedom
## Multiple R-squared: 0.4472, Adjusted R-squared: 0.4465
## F-statistic: 642.4 on 1 and 794 DF, p-value: < 2.2e-16
pyear <- seq(min(dat2$year), 2190, 1)
plot(dat2, xlim = c(min(pyear), max(pyear)), ylim = c(0,100),
pch=16, col="blue", main = grp, xaxt='n',yaxt='n', cex = 0.75)
axis(1, at = seq(min(pyear), max(pyear), 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
axis(2, at = seq(0, 100, 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
matlines(pyear, predict(fit, newdata=list(year=pyear), interval="confidence"), lwd=1.25)

Summary of linear regression with average values
pyear <- seq(1990, 2190, 1)
plot(x=pyear, y=c(), xlim = c(min(pyear), max(pyear)), ylim=c(0,100),
ylab = "IT.NET.USER.ZS", xlab = "year", xaxt='n',yaxt='n')
axis(1, at = seq(min(pyear),max(pyear), 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
axis(2, at = seq(0, 100, 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
colors <- c("blue","red","orange","green")
names(colors) <- income_grp
for (grp in income_grp) {
gdat <- get_data(dat, grp, is.mean = T)
points(x=gdat$year, y=gdat$IT.NET.USER.ZS, pch=16, col=colors[grp], xaxt='n', yaxt='n', cex=0.75)
matlines(pyear, predict(lmdls[[grp]], newdata=list(year=pyear), interval="confidence"), lwd=1)
}
legend("bottomright", legend=c("high income","low income","lower middle income","upper middle income"),
col=colors, lty=0, cex = 0.75, bg = "transparent", pch=16, box.lty=0)

pyear <- seq(1990, 2190, 1)
plot(x=pyear, y=c(), xlim = c(min(pyear), max(pyear)), ylim=c(0,100),
ylab = "IT.NET.USER.ZS", xlab = "year", xaxt='n',yaxt='n')
axis(1, at = seq(min(pyear),max(pyear), 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
axis(2, at = seq(0, 100, 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
colors <- c("blue","red","orange","green")
names(colors) <- income_grp
for (grp in income_grp) {
gdat <- get_data(dat, grp, is.mean = F)
points(x=gdat$year, y=gdat$IT.NET.USER.ZS, pch=16, col=colors[grp], xaxt='n', yaxt='n', cex=0.25)
matlines(pyear, col = colors[grp], predict(lmdls[[grp]], newdata=list(year=pyear), interval="confidence"), lwd=1)
}
legend("bottomright", legend=c("high income","low income","lower middle income","upper middle income"),
col=colors, lty=0, cex = 0.75, bg = "transparent", pch=16, box.lty=0)

Logistic Regression
Logistic regression for high income countries
grp = income_grp[1]
dat2 <- get_data(dat, grp)
fit <- nls(IT.NET.USER.ZS ~ SSlogis(year, Asym, xmid, scal), data=dat2)
smdls[[grp]] <- fit
summary(fit)
##
## Formula: IT.NET.USER.ZS ~ SSlogis(year, Asym, xmid, scal)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Asym 85.8351 0.9320 92.10 <2e-16 ***
## xmid 2004.7754 0.1508 13291.23 <2e-16 ***
## scal 3.8719 0.1144 33.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.44 on 2142 degrees of freedom
##
## Number of iterations to convergence: 4
## Achieved convergence tolerance: 8.016e-06
Asym = fit$m$getAllPars()["Asym"]
xmid = fit$m$getAllPars()["xmid"]
scal = fit$m$getAllPars()["scal"]
(min(dat2$year)+(xmid-min(dat2$year))*100/Asym)
## xmid
## 2007.214
fit <- nls(IT.NET.USER.ZS ~ 100/(1+exp(-1*(year-2007.214)/scal)), data = dat2, start = list(scal=scal))
smdls[[grp]] <- fit
summary(fit)
##
## Formula: IT.NET.USER.ZS ~ 100/(1 + exp(-1 * (year - 2007.214)/scal))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## scal 5.15269 0.08881 58.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.86 on 2144 degrees of freedom
##
## Number of iterations to convergence: 4
## Achieved convergence tolerance: 4.168e-06
pyear <- seq(max(dat2$year), max(dat2$year)+35, 1)
myear <- min(pyear)+sum(predict(fit, newdata=list(year=pyear)) < 99.5)
pyear <- seq(myear-2,myear+2, 1)
df <- predict(fit, newdata=list(year=pyear), interval = "confidence")
names(df) <- pyear
(df)
## 2033 2034 2035 2036 2037
## 99.33362 99.45052 99.54701 99.62662 99.69229
pyear <- seq(min(dat2$year), 2036, 1)
plot(dat2, xlim = c(min(pyear), max(pyear)), ylim = c(0,100),
pch=16, col="blue", main = grp, xaxt='n',yaxt='n', cex = 0.5)
axis(1, at = seq(min(pyear), max(pyear), 2), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
axis(2, at = seq(0, 100, 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
matlines(pyear, predict(fit, newdata=list(year=pyear), interval="confidence"), lwd=1.25)

Logistic regression for upper middle income countries
grp = income_grp[4]
dat2 <- get_data(dat, grp)
fit <- nls(IT.NET.USER.ZS ~ SSlogis(year, Asym, xmid, scal), data=dat2)
smdls[[grp]] <- fit
summary(fit)
##
## Formula: IT.NET.USER.ZS ~ SSlogis(year, Asym, xmid, scal)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Asym 88.4304 3.8442 23.00 <2e-16 ***
## xmid 2013.6320 0.4798 4196.40 <2e-16 ***
## scal 4.6477 0.1970 23.59 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.794 on 1501 degrees of freedom
##
## Number of iterations to convergence: 3
## Achieved convergence tolerance: 5.134e-06
Asym = fit$m$getAllPars()["Asym"]
xmid = fit$m$getAllPars()["xmid"]
scal = fit$m$getAllPars()["scal"]
(min(dat2$year)+(xmid-min(dat2$year))*100/Asym)
## xmid
## 2016.724
fit <- nls(IT.NET.USER.ZS ~ 100/(1+exp(-1*(year-2016.724)/scal)), data = dat2, start = list(scal=scal))
smdls[[grp]] <- fit
summary(fit)
##
## Formula: IT.NET.USER.ZS ~ 100/(1 + exp(-1 * (year - 2016.724)/scal))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## scal 6.1277 0.1131 54.19 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.64 on 1503 degrees of freedom
##
## Number of iterations to convergence: 5
## Achieved convergence tolerance: 3.723e-06
pyear <- seq(max(dat2$year), max(dat2$year)+50, 1)
myear <- min(pyear)+sum(predict(fit, newdata=list(year=pyear)) < 99.5)
pyear <- seq(myear-2,myear+2, 1)
df <- predict(fit, newdata=list(year=pyear), interval = "confidence")
names(df) <- pyear
(df)
## 2048 2049 2050 2051 2052
## 99.39647 99.48688 99.56380 99.62924 99.68489
pyear <- seq(min(dat2$year), 2050, 1)
plot(dat2, xlim = c(min(pyear), max(pyear)), ylim = c(0,100),
pch=16, col="blue", main = grp, xaxt='n',yaxt='n', cex = 0.5)
axis(1, at = seq(min(pyear), max(pyear), 4), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
axis(2, at = seq(0, 100, 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
matlines(pyear, predict(fit, newdata=list(year=pyear), interval="confidence"), lwd=1.25)

Logistic regression for lower middle income countries
grp = income_grp[3]
dat2 <- get_data(dat, grp)
fit <- nls(IT.NET.USER.ZS ~ SSlogis(year, Asym, xmid, scal), data=dat2)
smdls[[grp]] <- fit
summary(fit)
##
## Formula: IT.NET.USER.ZS ~ SSlogis(year, Asym, xmid, scal)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Asym 92.8126 15.1584 6.123 1.16e-09 ***
## xmid 2019.4656 1.4861 1358.932 < 2e-16 ***
## scal 4.8565 0.3338 14.551 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.068 on 1592 degrees of freedom
##
## Number of iterations to convergence: 3
## Achieved convergence tolerance: 3.66e-06
Asym = fit$m$getAllPars()["Asym"]
xmid = fit$m$getAllPars()["xmid"]
scal = fit$m$getAllPars()["scal"]
(min(dat2$year)+(xmid-min(dat2$year))*100/Asym)
## xmid
## 2021.747
fit <- nls(IT.NET.USER.ZS ~ 100/(1+exp(-1*(year-2021.747)/scal)), data = dat2, start = list(scal=scal))
smdls[[grp]] <- fit
summary(fit)
##
## Formula: IT.NET.USER.ZS ~ 100/(1 + exp(-1 * (year - 2021.747)/scal))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## scal 6.07748 0.09645 63.01 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.277 on 1594 degrees of freedom
##
## Number of iterations to convergence: 5
## Achieved convergence tolerance: 2.677e-06
pyear <- seq(max(dat2$year), max(dat2$year)+150, 1)
myear <- min(pyear)+sum(predict(fit, newdata=list(year=pyear)) < 99.5)
pyear <- seq(myear-2,myear+2, 1)
df <- predict(fit, newdata=list(year=pyear), interval = "confidence")
names(df) <- pyear
(df)
## 2052 2053 2054 2055 2056
## 99.31585 99.41905 99.50675 99.58127 99.64458
pyear <- seq(min(dat2$year), 2058, 1)
plot(dat2, xlim = c(min(pyear), max(pyear)), ylim = c(0,100),
pch=16, col="blue", main = grp, xaxt='n',yaxt='n', cex = 0.5)
axis(1, at = seq(min(pyear), max(pyear), 4), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
axis(2, at = seq(0, 100, 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
matlines(pyear, predict(fit, newdata=list(year=pyear), interval="confidence"), lwd=1.25)

Logistic regression for low income countries
grp = income_grp[2]
dat2 <- get_data(dat, grp)
fit <- nls(IT.NET.USER.ZS ~ SSlogis(year, Asym, xmid, scal), data=dat2)
smdls[[grp]] <- fit
summary(fit)
##
## Formula: IT.NET.USER.ZS ~ SSlogis(year, Asym, xmid, scal)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Asym 63.6779 34.4079 1.851 0.0646 .
## xmid 2023.0045 3.7704 536.554 < 2e-16 ***
## scal 4.3987 0.5267 8.351 3.01e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.084 on 793 degrees of freedom
##
## Number of iterations to convergence: 3
## Achieved convergence tolerance: 8.246e-07
Asym = fit$m$getAllPars()["Asym"]
xmid = fit$m$getAllPars()["xmid"]
scal = fit$m$getAllPars()["scal"]
(min(dat2$year)+(xmid-min(dat2$year))*100/Asym)
## xmid
## 2041.83
fit <- nls(IT.NET.USER.ZS ~ 100/(1+exp(-1*(year-2041.83)/scal)), data = dat2, start = list(scal=scal))
smdls[[grp]] <- fit
summary(fit)
##
## Formula: IT.NET.USER.ZS ~ 100/(1 + exp(-1 * (year - 2041.83)/scal))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## scal 11.1687 0.1501 74.42 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.914 on 795 degrees of freedom
##
## Number of iterations to convergence: 6
## Achieved convergence tolerance: 1.684e-06
pyear <- seq(max(dat2$year), max(dat2$year)+150, 1)
myear <- min(pyear)+sum(predict(fit, newdata=list(year=pyear)) < 99.5)
pyear <- seq(myear-2,myear+2, 1)
df <- predict(fit, newdata=list(year=pyear), interval = "confidence")
names(df) <- pyear
(df)
## 2099 2100 2101 2102 2103
## 99.40522 99.45588 99.50225 99.54468 99.58352
pyear <- seq(min(dat2$year), 2100, 1)
plot(dat2, xlim = c(min(pyear), max(pyear)), ylim = c(0,100),
pch=16, col="blue", main = grp, xaxt='n',yaxt='n', cex = 0.5)
axis(1, at = seq(min(pyear), max(pyear), 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
axis(2, at = seq(0, 100, 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
matlines(pyear, predict(fit, newdata=list(year=pyear), interval="confidence"), lwd=1.25)

Summary of logistic regression with average values
pyear <- seq(1990, 2100, 1)
plot(x=pyear, y=c(), xlim = c(min(pyear), max(pyear)), ylim=c(0,100),
ylab = "IT.NET.USER.ZS", xlab = "year", xaxt='n',yaxt='n')
axis(1, at = seq(min(pyear),max(pyear), 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
axis(2, at = seq(0, 100, 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
colors <- c("blue","red","orange","green")
names(colors) <- income_grp
for (grp in income_grp) {
gdat <- get_data(dat, grp, is.mean = T)
points(x=gdat$year, y=gdat$IT.NET.USER.ZS, pch=16, col=colors[grp], xaxt='n', yaxt='n', cex=0.75)
matlines(pyear, predict(smdls[[grp]], newdata=list(year=pyear), interval="confidence"), lwd=1)
}
legend("bottomright", legend=c("high income","low income","lower middle income","upper middle income"),
col=colors, lty=0, cex = 0.75, bg = "transparent", pch=16, box.lty=0)

pyear <- seq(1990, 2100, 1)
plot(x=pyear, y=c(), xlim = c(min(pyear), max(pyear)), ylim=c(0,100),
ylab = "IT.NET.USER.ZS", xlab = "year", xaxt='n',yaxt='n')
axis(1, at = seq(min(pyear),max(pyear), 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
axis(2, at = seq(0, 100, 5), tck = 1, lty = 4, col = "lightgray", lwd = 0.75)
colors <- c("blue","red","orange","green")
names(colors) <- income_grp
for (grp in income_grp) {
gdat <- get_data(dat, grp, is.mean = F)
points(x=gdat$year, y=gdat$IT.NET.USER.ZS, pch=16, col=colors[grp], xaxt='n', yaxt='n', cex=0.25)
matlines(pyear, col = colors[grp], predict(smdls[[grp]], newdata=list(year=pyear), interval="confidence"), lwd=1)
}
legend("bottomright", legend=c("high income","low income","lower middle income","upper middle income"),
col=colors, lty=0, cex = 0.75, bg = "transparent", pch=16, box.lty=0)
